1. IMPORT


library(dada2)
packageVersion("dada2") # check dada2 version
## [1] '1.22.0'
library(Biostrings)
library(ShortRead)
library(seqTools) # per base sequence content
library(phyloseq)
library(ggplot2)
library(data.table)
library(plyr)
library(dplyr)
library(qckitfastq) # per base sequence content
library(stringr)

# ROOT DIRECTORY (to modify on your computer)
path.root <- "~/Projects/MetaIBS"
path.fukui  <- file.path(path.root, "scripts/analysis-individual/Fukui-2020")
path.data   <- file.path(path.root, "data/analysis-individual/Fukui-2020") # REPLACE WITH data_empty/analysis-individual/Fukui-2020

2. QUALITY CHECK


2.1. Fastq quality profiles

First, we import the fastq files containing the raw reads. The samples were downloaded from the SRA database with the accession number PRJNA637763.

# Save the path to the directory containing the fastq zipped files
path.fastq <- file.path(path.data, "raw_fastq")
# list.files(path.fastq) # check we are in the right directory

# fastq filenames have format: SAMPLENAME.fastq.gz
# Saves the whole directory path to each file name
fnFs <- sort(list.files(path.fastq, pattern="_1.fastq.gz", full.names = TRUE)) # forward
fnRs <- sort(list.files(path.fastq, pattern="_2.fastq.gz", full.names = TRUE)) # reverse
show(fnFs[1:5])
show(fnRs[1:5])
# length(fnFs) # should be 111
# length(fnRs) # should be 111

# Extract sample names, assuming filenames have format: SAMPLENAME.fastq.gz
sample.names <- sapply(strsplit(basename(fnFs), "_"), `[`, 1)
show(sample.names[1:5]) # saves only the file name (without the path)

# Look at quality of all files
for (i in 1:3){ # 1:length(fnFs)
  show(plotQualityProfile(fnFs[i]))
  show(plotQualityProfile(fnRs[i]))
}

# Look at nb of reads per sample
# raw_stats <- data.frame('sample' = sample.names,
#                         'reads' = fastqq(fnFs)@nReads)
# min(raw_stats$reads) # min 22,487 reads/sample
# max(raw_stats$reads) # max 72,649 reads/sample
# mean(raw_stats$reads) # 41,870 average

We will have a quick peak at the per base sequence content of the reads in some samples, to make sure there is no anomaly (i.e. all reads having the same sequence).

# Look at per base sequence content (forward read)
fseqF <- seqTools::fastqq(fnFs[1])
## [fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944139_1.fastq.gz'   done.
rcF <- read_content(fseqF)
plot_read_content(rcF) + labs(title = "Per base sequence content - Forward read")

# Look at per base sequence content (reverse read)
fseqR <- seqTools::fastqq(fnRs[1])
## [fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944139_2.fastq.gz'   done.
rcR <- read_content(fseqR)
plot_read_content(rcR) + labs(title = "Per base sequence content - Reverse read")

2.2. Look for primers

Now, we will look whether the reads still contain the primers. Primer sequences were provided in the methods section of the paper. The sequences provided in the paper were long, so only the relevant part of the sequences were kept (the part of the sequences corresponding to 27F and 338R)

# V1-V2
FWD <- "TTTGATCCTGGCTCAG" # 27F (end of sequence is base 27)
REV <- "TGCCTCCCGTAGGAGT" # 338R (end of sequence is base 338)

# Function that, from the primer sequence, will return all combinations possible (complement, reverse complement, ...)
allOrients <- function(primer) {
    # Create all orientations of the input sequence
    require(Biostrings)
    dna <- DNAString(primer)  # The Biostrings works w/ DNAString objects rather than character vectors
    orients <- c(Forward = dna, Complement = complement(dna), Reverse = reverse(dna), 
                 RevComp = reverseComplement(dna)) # get all orientations
    return(sapply(orients, toString))
}

# Get all combinations of the primer sequences
FWD.orients <- allOrients(FWD) # 27F
REV.orients <- allOrients(REV) # 338R
FWD.orients # sanity check
##            Forward         Complement            Reverse            RevComp 
## "TTTGATCCTGGCTCAG" "AAACTAGGACCGAGTC" "GACTCGGTCCTAGTTT" "CTGAGCCAGGATCAAA"
REV.orients
##            Forward         Complement            Reverse            RevComp 
## "TGCCTCCCGTAGGAGT" "ACGGAGGGCATCCTCA" "TGAGGATGCCCTCCGT" "ACTCCTACGGGAGGCA"
# Function that counts number of reads in which a sequence is found
primerHits <- function(primer, fn) {
    nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE, max.mismatch = 2)
    return(sum(nhits > 0))
}

# Look in all samples how many times the 27F and 338R primers are found in the reads of each sample
for (i in 1:3){ # 1:length(fnFs)
  cat("SAMPLE", sample.names[i], "with total number of", raw_stats[i,'reads'], "reads\n\n")
  x <- rbind(ForwardRead.FWDPrimer = sapply(FWD.orients, primerHits, fn = fnFs[[i]]),
             ForwardRead.REVPrimer = sapply(REV.orients, primerHits, fn = fnFs[[i]]),
             ReverseRead.FWDPrimer = sapply(FWD.orients, primerHits, fn = fnRs[[i]]), 
             ReverseRead.REVPrimer = sapply(REV.orients, primerHits, fn = fnRs[[i]]))
  print(x)
  cat("\n____________________________________________\n\n")
}
## SAMPLE SRR11944139 with total number of 60876 reads
## 
##                       Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer   60252          0       0       0
## ForwardRead.REVPrimer       8          0       1      55
## ReverseRead.FWDPrimer       9          0       0      52
## ReverseRead.REVPrimer   59993          0       0       0
## 
## ____________________________________________
## 
## SAMPLE SRR11944140 with total number of 22487 reads
## 
##                       Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer   22285          0       0       0
## ForwardRead.REVPrimer       3          0       0      20
## ReverseRead.FWDPrimer       3          0       0      20
## ReverseRead.REVPrimer   22230          0       0       0
## 
## ____________________________________________
## 
## SAMPLE SRR11944141 with total number of 38658 reads
## 
##                       Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer   37148          0       0       0
## ForwardRead.REVPrimer       9          2       1      90
## ReverseRead.FWDPrimer       6          0       0      87
## ReverseRead.REVPrimer   37973          0       2       0
## 
## ____________________________________________

Let’s have a quick look at where primers are positioned in the forward/reverse reads

# Function that gets position in which sequence is found
primerHitsPosition <- function(primer, fn){
  hits <- as.data.frame(vmatchPattern(primer, sread(readFastq(fn)), fixed = FALSE, max.mismatch = 2))
  hits <- hits[,c("group", "start")]
  colnames(hits) <- c("sample", "start")
  hits$sample <- sapply(strsplit(basename(fn), "_"), `[`, 1)
  hits$readslength <- seqTools::fastqq(fn)@maxSeqLen
  return(hits)
}

# Get position of FWD primers
FWDpos <- data.frame()
for(i in 1:3){ # length(fnFs)
  cat("SAMPLE", i)
  new <- primerHitsPosition(FWD.orients["Forward"], fnFs[[i]])
  FWDpos <- rbind(new, FWDpos)
}
## SAMPLE 1[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944139_1.fastq.gz'   done.
## SAMPLE 2[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944140_1.fastq.gz'   done.
## SAMPLE 3[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944141_1.fastq.gz'   done.
ggplot(FWDpos, aes(x=start))+
  geom_density(aes(y=..scaled..)) +
  xlim(c(0,max(FWDpos$readslength)))+
  labs(x="start position of FWD primer", y="proportion of primers starting at x position")

# Get position of REV primers
REVpos <- data.frame()
for(i in 1:3){ # length(fnRs)
  cat("SAMPLE", i)
  new <- primerHitsPosition(REV.orients["Forward"], fnRs[[i]])
  REVpos <- rbind(new, REVpos)
}
## SAMPLE 1[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944139_2.fastq.gz'   done.
## SAMPLE 2[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944140_2.fastq.gz'   done.
## SAMPLE 3[fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Fukui-2020/raw_fastq/SRR11944141_2.fastq.gz'   done.
ggplot(REVpos, aes(x=start))+
  geom_density(aes(y=..scaled..)) +
  xlim(c(0,max(REVpos$readslength)))+
  labs(x="start position of REV primer", y="proportion of primers starting at x position")


3. FILTER AND TRIM


3.1. Primer removal

The reads indeed contain the primers (both FWD and REV primers). We will keep only reads containing primers, and then remove the primers!

# KEEP READS WITH PRIMER AND REMOVE PRIMER+BARCODE
# Place filtered files in a filtered1/ subdirectory
FWD.filt1_samples <- file.path(path.data, "filtered1", paste0(sample.names, "_1_filt1.fastq.gz")) # FWD reads
REV.filt1_samples <- file.path(path.data, "filtered1", paste0(sample.names, "_2_filt1.fastq.gz")) # REV reads
# Assign names for the filtered fastq.gz files
names(FWD.filt1_samples) <- sample.names
names(REV.filt1_samples) <- sample.names

# Keep only reads with primers & remove primers
FWD.out1 <- removePrimers(fn = fnFs, fout = FWD.filt1_samples,
                          primer.fwd = FWD.orients[["Forward"]],
                          trim.fwd = TRUE,
                          orient = FALSE, # re-orient reads if needed
                          compress = TRUE, verbose = TRUE)
REV.out1 <- removePrimers(fn = fnRs, fout = REV.filt1_samples,
                          primer.fwd = REV.orients[["Forward"]],
                          trim.fwd = TRUE,
                          orient = FALSE, # re-orient reads if needed
                          compress = TRUE, verbose = TRUE)
# Primer removal
FWD.out1[1:3,]
##                        reads.in reads.out
## SRR11944139_1.fastq.gz    60876     60252
## SRR11944140_1.fastq.gz    22487     22285
## SRR11944141_1.fastq.gz    38658     37148
REV.out1[1:3,]
##                        reads.in reads.out
## SRR11944139_2.fastq.gz    60876     59993
## SRR11944140_2.fastq.gz    22487     22230
## SRR11944141_2.fastq.gz    38658     37973
# Quality profile after primer removal
for (i in 1:3){
  show(plotQualityProfile(FWD.filt1_samples[i]))
  show(plotQualityProfile(REV.filt1_samples[i]))
}

3.2. Quality filtering

Then, we perform a quality filtering of the reads.

# Place filtered files in a filtered/ subdirectory
FWD.filt2_samples <- file.path(path.data, "filtered2", paste0(sample.names, "_1_filt2.fastq.gz")) # FWD reads
REV.filt2_samples <- file.path(path.data, "filtered2", paste0(sample.names, "_2_filt2.fastq.gz")) # REV reads
# Assign names for the filtered fastq.gz files
names(FWD.filt2_samples) <- sample.names
names(REV.filt2_samples) <- sample.names

# Filter
out2 <- filterAndTrim(fwd = FWD.filt1_samples, filt = FWD.filt2_samples,
                      rev = REV.filt1_samples, filt.rev = REV.filt2_samples,
                      maxEE=3, # reads with more than 3 expected errors (sum(10e(-Q/10))) are discarded
                      truncQ=10, # Truncate reads at the first instance of a quality score less than or equal to truncQ.
                      minLen = 150, # Discard reads shorter than 150 bp. This is done after trimming and truncation.
                      matchIDs = TRUE,
                      id.field = 1, # we don't have standard illumina header, this will help matching paired FWD/REV reads
                      compress=TRUE, multithread = TRUE, verbose=TRUE)

Let’s look at the output filtered fastq files as sanity check.

out2[1:4,] # show how many reads were filtered in each file
##                              reads.in reads.out
## SRR11944139_1_filt1.fastq.gz    60252     50261
## SRR11944140_1_filt1.fastq.gz    22285     19697
## SRR11944141_1_filt1.fastq.gz    37148     33283
## SRR11944142_1_filt1.fastq.gz    46114     37924
# Look at quality profile of all filtered files
for (i in 1:3){
  show(plotQualityProfile(FWD.filt2_samples[i]))
  show(plotQualityProfile(REV.filt2_samples[i]))
}


4. CONSTRUCT ASV TABLE


4.1. Learn error rates

Now we will build the parametric error model, to be able to infer amplicon sequence variants (ASVs) later on.

set.seed(123)
errF <- learnErrors(FWD.filt2_samples, multithread=TRUE, randomize = TRUE, verbose = 1)
set.seed(123)
errR <- learnErrors(REV.filt2_samples, multithread=TRUE, randomize = TRUE, verbose = 1)

The error rates for each possible transition (A→C, A→G, …) are shown. Points are the observed error rates for each consensus quality score. The black line shows the estimated error rates after convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal definition of the Q-score. Here the estimated error rates (black line) are a good fit to the observed rates (points), and the error rates drop with increased quality as expected.

plotErrors(errF, nominalQ = TRUE) # Forward reads

plotErrors(errR, nominalQ = TRUE) # Reverse reads

4.2. Infer sample composition

The dada() algorithm infers sequence variants based on estimated errors (previous step). Firstly, we de-replicate the reads in each sample, to reduce the computation time. De-replication is a common step in almost all modern ASV inference (or OTU picking) pipelines, but a unique feature of derepFastq is that it maintains a summary of the quality information for each dereplicated sequence in $quals.

# Dereplicate the reads in the sample
derepF <- derepFastq(FWD.filt2_samples) # forward
derepR <- derepFastq(REV.filt2_samples) # reverse

# Infer sequence variants
dadaFs <- dada(derepF, err=errF, multithread=TRUE) # forward
dadaRs <- dada(derepR, err=errR, multithread=TRUE) # reverse
# Inspect the infered sequence variants from sample 1:2
for (i in 1:2){
  print(dadaFs[[i]])
  print(dadaRs[[i]])
  print("________________")
}
## dada-class: object describing DADA2 denoising results
## 363 sequence variants were inferred from 12053 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## dada-class: object describing DADA2 denoising results
## 262 sequence variants were inferred from 17014 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## [1] "________________"
## dada-class: object describing DADA2 denoising results
## 170 sequence variants were inferred from 4012 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## dada-class: object describing DADA2 denoising results
## 122 sequence variants were inferred from 5916 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## [1] "________________"

4.3. Merge paired-end reads

mergers <- mergePairs(dadaFs, derepF, dadaRs, derepR, verbose=TRUE)
head(mergers[[1]])

4.4. Construct ASV table

We can now construct an amplicon sequence variant table (ASV) table, a higher-resolution version of the OTU table produced by traditional methods.

# Make sequence table from the infered sequence variants
seqtable <- makeSequenceTable(mergers)

# We should have 111 samples (111 rows)
dim(seqtable)
## [1]   111 10620
# Inspect distribution of sequence lengths
hist(nchar(getSequences(seqtable)), breaks = 100, xlab = "ASV length", ylab = "Number of ASVs", main="")

# Sequences should be between 27F - 338R, so around 311bp (removing the primers lengths).

# Remove any sequence variant below outside 260-360bp
seqtable.new <- seqtable[,nchar(colnames(seqtable)) %in% 260:360]
dim(seqtable.new)
## [1]   111 10363
hist(nchar(getSequences(seqtable.new)), breaks = 100, xlab = "ASV length", ylab = "Number of ASVs", main="")

# check we haven't lost too many counts
sum(seqtable.new)/sum(seqtable)
## [1] 0.990101

4.5. Remove chimeras

The core dada method corrects substitution and indel errors, but chimeras remain. Fortunately, the accuracy of sequence variants after denoising makes identifying chimeric ASVs simpler than when dealing with fuzzy OTUs. Chimeric sequences are identified if they can be exactly reconstructed by combining a left-segment and a right-segment from two more abundant “parent” sequences.

seqtable.nochim <- removeBimeraDenovo(seqtable.new, method="consensus", multithread=TRUE, verbose=TRUE)

# Check how many sequence variants we have after removing chimeras
dim(seqtable.nochim)
## [1]  111 6801
# Check how many reads we have after removing chimeras (we should keep the vast majority of the reads, like > 80%)
sum(seqtable.nochim)/sum(seqtable)
## [1] 0.9665405

5. LOOK AT READS COUNT THROUGH PIPELINE


Sanity check before assigning taxonomy.

# Function that counts nb of reads
getN <- function(x) sum(getUniques(x))

# Table that will count number of reads for each process of interest (input reads, filtered reads, denoised reads, non chimera reads)
track <- cbind(FWD.out1,
               data.frame("primerfiltR"=REV.out1[,2]),
               data.frame("qualityfilt"=out2[,2]),
               sapply(dadaFs, getN),
               sapply(dadaRs, getN),
               sapply(mergers, getN),
               rowSums(seqtable.nochim),
               as.integer(rowSums(seqtable.nochim)*100/FWD.out1[,1]))

# Assign column and row names
colnames(track) <- c("input", "primerfiltF", "primerfiltR", "quality-filt", "denoisedF", "denoisedR", 'merged', 'nonchim', "%input->output")
rownames(track) <- sample.names

# Show final table: for each row/sample, we have shown the initial number of reads, filtered reads, denoised reads, and non chimera reads
track

6. TAXONOMIC TABLE


Extensions: The dada2 package also implements a method to make species level assignments based on exact matching between ASVs and sequenced reference strains. Recent analysis suggests that exact matching (or 100% identity) is the only appropriate way to assign species to 16S gene fragments. Currently, species-assignment training fastas are available for the Silva and RDP 16S databases. To follow the optional species addition step, download the silva_species_assignment_v132.fa.gz file, and place it in the directory with the fastq files.

path.silva <- file.path(path.root, "data/analysis-individual/CLUSTER/taxonomy/silva-taxonomic-ref")

# Assign taxonomy (with silva v138)
taxa <- assignTaxonomy(seqtable.nochim, file.path(path.silva, "silva_nr99_v138.1_train_set.fa.gz"),
                       tryRC = TRUE, # try reverse complement of the sequences
                       multithread=TRUE, verbose = TRUE)

# Add species assignment
taxa <- addSpecies(taxa, file.path(path.silva, "silva_species_assignment_v138.1.fa.gz"))
# Check how the taxonomy table looks like
taxa.print <- taxa
rownames(taxa.print) <- NULL # Removing sequence rownames for display only
head(taxa.print)
##      Kingdom    Phylum             Class            Order              
## [1,] "Bacteria" "Bacteroidota"     "Bacteroidia"    "Bacteroidales"    
## [2,] "Bacteria" "Bacteroidota"     "Bacteroidia"    "Bacteroidales"    
## [3,] "Bacteria" "Actinobacteriota" "Actinobacteria" "Bifidobacteriales"
## [4,] "Bacteria" "Bacteroidota"     "Bacteroidia"    "Bacteroidales"    
## [5,] "Bacteria" "Bacteroidota"     "Bacteroidia"    "Bacteroidales"    
## [6,] "Bacteria" "Bacteroidota"     "Bacteroidia"    "Bacteroidales"    
##      Family               Genus             Species            
## [1,] "Bacteroidaceae"     "Bacteroides"     "vulgatus"         
## [2,] "Bacteroidaceae"     "Bacteroides"     "dorei"            
## [3,] "Bifidobacteriaceae" "Bifidobacterium" "pseudocatenulatum"
## [4,] "Bacteroidaceae"     "Bacteroides"     "uniformis"        
## [5,] "Bacteroidaceae"     "Bacteroides"     "uniformis"        
## [6,] "Bacteroidaceae"     "Bacteroides"     "dorei"
table(taxa.print[,1], useNA="ifany") # Show the different kingdoms
## 
## Bacteria 
##     6801
table(taxa.print[,2], useNA="ifany") # Show the different Phyla
## 
##  Actinobacteriota      Bacteroidota  Campylobacterota     Cyanobacteria 
##               207               966                 4                 5 
##  Desulfobacterota        Firmicutes    Fusobacteriota   Patescibacteria 
##                66              5305                35                 5 
##    Proteobacteria     Spirochaetota      Synergistota Verrucomicrobiota 
##               188                 1                 4                11 
##              <NA> 
##                 4

7. LAST PREPROCESSING


We will remove any sample with less than 500 reads from further analysis, and also any ASVs with unassigned phyla.

7.1. Create phyloseq object

The preprocessing will be easier to do with ASV, taxonomic and metadata tables combined in a phyloseq object.

#________________________
# Import metadata
metadata_table <- read.csv(file.path(path.data, "00_Metadata-Fukui/Metadata-Fukui.csv"), row.names=1)


#_________________________
# Create phyloseq object
physeq <- phyloseq(otu_table(seqtable.nochim, taxa_are_rows=FALSE),
                   sample_data(metadata_table), 
                   tax_table(taxa))

# Remove taxa that are eukaryota, or have unassigned Phyla
physeq <- subset_taxa(physeq, Kingdom != "Eukaryota")
physeq <- subset_taxa(physeq, !is.na(Phylum))
# Remove samples with less than 500 reads
physeq <- prune_samples(sample_sums(physeq)>=500, physeq) # 1 sample discarded
# Some taxa might have been present only in these low-count samples, so we will make sure to remove taxa that are absent in all samples
physeq <- prune_taxa(taxa_sums(physeq)>0, physeq)

7.2. Quick peek at data analysis

# Absolute abundance
# plot_bar(physeq, fill = "Phylum")+ facet_wrap("host_disease", scales="free_x") + theme(axis.text.x = element_blank())

# Relative abundance for Phylum
phylum.table <- physeq %>%
  tax_glom(taxrank = "Phylum") %>%                     # agglomerate at phylum level
  transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
  psmelt()                                             # Melt to long format

ggplot(phylum.table, aes(x = reorder(Sample, Sample, function(x) mean(phylum.table[Sample == x & Phylum == 'Firmicutes', 'Abundance'])),
                         y = Abundance, fill = Phylum))+
  facet_wrap(~ host_disease, scales = "free") + # scales = "free" removes empty lines
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(size = 2, angle = -90))+
  labs(x = "Samples", y = "Relative abundance")

7.3. Save to disk

# Save to disk
saveRDS(raw_stats, file.path(path.data, "01_Dada2-Fukui/raw_stats.rds"))
saveRDS(FWD.out1,  file.path(path.data, "01_Dada2-Fukui/FWD_out1.rds"))
saveRDS(REV.out1,  file.path(path.data, "01_Dada2-Fukui/REV_out1.rds"))
saveRDS(out2,      file.path(path.data, "01_Dada2-Fukui/out2.rds"))
saveRDS(errF,      file.path(path.data, "01_Dada2-Fukui/errF.rds"))
saveRDS(errR,      file.path(path.data, "01_Dada2-Fukui/errR.rds"))
saveRDS(dadaFs,    file.path(path.data, "01_Dada2-Fukui/infered_seq_F.rds"))
saveRDS(dadaRs,    file.path(path.data, "01_Dada2-Fukui/infered_seq_R.rds"))
saveRDS(mergers,   file.path(path.data, "01_Dada2-Fukui/mergers.rds"))

# Taxa & phyloseq object
saveRDS(taxa,   file.path(path.data, "01_Dada2-Fukui/taxa_fukui.rds"))
saveRDS(physeq, file.path(path.root, "data/analysis-individual/CLUSTER/phylotree/input/physeq_fukui.rds"))
saveRDS(physeq, file.path(path.root, "data/phyloseq-objects/phyloseq-without-phylotree/physeq_fukui.rds"))

8. SESSION INFO


sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] stringr_1.5.0               qckitfastq_1.10.0          
##  [3] dplyr_1.1.1                 plyr_1.8.8                 
##  [5] data.table_1.14.8           ggplot2_3.4.2              
##  [7] phyloseq_1.38.0             seqTools_1.28.0            
##  [9] zlibbioc_1.40.0             ShortRead_1.52.0           
## [11] GenomicAlignments_1.30.0    SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0              MatrixGenerics_1.6.0       
## [15] matrixStats_0.63.0          Rsamtools_2.10.0           
## [17] GenomicRanges_1.46.1        BiocParallel_1.28.3        
## [19] Biostrings_2.62.0           GenomeInfoDb_1.30.1        
## [21] XVector_0.34.0              IRanges_2.28.0             
## [23] S4Vectors_0.32.4            BiocGenerics_0.40.0        
## [25] dada2_1.22.0                Rcpp_1.0.10                
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162           bitops_1.0-7           RColorBrewer_1.1-3    
##  [4] tools_4.1.3            bslib_0.4.2            utf8_1.2.3            
##  [7] R6_2.5.1               vegan_2.6-4            mgcv_1.8-42           
## [10] DBI_1.1.3              colorspace_2.1-0       permute_0.9-7         
## [13] rhdf5filters_1.6.0     ade4_1.7-22            withr_2.5.0           
## [16] tidyselect_1.2.0       compiler_4.1.3         cli_3.6.1             
## [19] DelayedArray_0.20.0    labeling_0.4.2         sass_0.4.5            
## [22] scales_1.2.1           RSeqAn_1.14.0          digest_0.6.31         
## [25] rmarkdown_2.21         jpeg_0.1-10            pkgconfig_2.0.3       
## [28] htmltools_0.5.5        highr_0.10             fastmap_1.1.1         
## [31] rlang_1.1.0            rstudioapi_0.14        farver_2.1.1          
## [34] jquerylib_0.1.4        generics_0.1.3         hwriter_1.3.2.1       
## [37] jsonlite_1.8.4         RCurl_1.98-1.12        magrittr_2.0.3        
## [40] GenomeInfoDbData_1.2.7 biomformat_1.22.0      interp_1.1-4          
## [43] Matrix_1.5-1           munsell_0.5.0          Rhdf5lib_1.16.0       
## [46] fansi_1.0.4            ape_5.7-1              lifecycle_1.0.3       
## [49] stringi_1.7.12         yaml_2.3.7             MASS_7.3-58.3         
## [52] rhdf5_2.38.1           grid_4.1.3             parallel_4.1.3        
## [55] crayon_1.5.2           deldir_1.0-6           lattice_0.20-45       
## [58] splines_4.1.3          multtest_2.50.0        knitr_1.42            
## [61] pillar_1.9.0           igraph_1.4.2           reshape2_1.4.4        
## [64] codetools_0.2-19       glue_1.6.2             evaluate_0.20         
## [67] latticeExtra_0.6-30    RcppParallel_5.1.7     png_0.1-8             
## [70] vctrs_0.6.1            foreach_1.5.2          gtable_0.3.3          
## [73] cachem_1.0.7           xfun_0.38              survival_3.5-5        
## [76] tibble_3.2.1           iterators_1.0.14       cluster_2.1.4